package com.webull.information.center.carwler.common.util.jsoup.prase_en;

import java.io.IOException;
import java.text.ParseException;
import java.util.Locale;
import java.util.Optional;
import java.util.TimeZone;

import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.webull.framework.util.UtilDate;
import com.webull.information.center.carwler.common.model.NewsInformation;
import com.webull.information.center.carwler.common.util.jsoup.HtmlBodyPrase;
import com.webull.information.center.carwler.common.util.jsoup.JsoupPraseUtil;
import com.webull.information.center.common.constants.Constants;

/**
 * 美通社
 * 
 * @author shimingjun
 * @date 2016年8月23日 下午5:55:33
 * @version 1.0
 * @since JDK 1.8
 */
public class Prnewswire_HtmlPrase implements HtmlBodyPrase {
	protected final Logger logger = LogManager.getLogger(getClass());

	/**
	 * for example
	 * :http://news.cmlviz.com/2016/07/21/tivo-inc-and-verint-systems-inc-head-
	 * to-head-compare.html
	 */
	@Override
	public void praseNewsInfo(org.jsoup.nodes.Document doc, NewsInformation info) {
		try {
			Element body = Optional.ofNullable(doc.getElementById("main"))
					.map(main0 -> main0.select("article.news-release").first()).orElse(null);
			if (body == null)
				return;
			// title
			Optional.ofNullable(body.select("h1").first()).map(h1 -> StringUtils.stripToNull(h1.html()))
					.ifPresent(h1_0 -> info.setTitle(h1_0));
			// sourceName2
			Optional.ofNullable(body.select("p.release-details>a").first())
					.map(a0 -> StringUtils.stripToNull(a0.ownText())).ifPresent(a1 -> info.setSourceName(a1));

			// new time
			String pdate = Optional.ofNullable(body.select("header.release-header>div>div>p.meta,mb-lg").first())
					.map(time2 -> StringUtils.stripToNull(time2.html())).orElse(null);
			if (StringUtils.isBlank(pdate)) {
				String date_ = Optional.ofNullable(body.select("p.release-details").first())
						.map(a0 -> StringUtils.stripToNull(a0.ownText())).orElse("");
				pdate = StringUtils.stripToNull(StringUtils.remove(date_, "from"));
			}
			if (StringUtils.isNotBlank(pdate)) {
				// Aug 02, 2016, 10:44 ET
				info.setPushTime(pdate);
				try {
					String timz = StringUtils.substringBeforeLast(pdate, " ");
					String edt = StringUtils.substringAfterLast(pdate, " ");
					Optional.ofNullable(
							UtilDate.parse(timz, Locale.US, TimeZone.getTimeZone(edt), "MMM dd, yyyy, hh:mm"))
							.ifPresent(d2 -> info.setNewsTime(d2));
				} catch (Exception e) {
				}
			}

			// 正文
			StringBuilder context = new StringBuilder();
			Elements bodys = Optional.ofNullable(body.select("section.release-body")).orElse(null);
			if (bodys == null || bodys.isEmpty()) {
				bodys = Optional.ofNullable(body.select("div.container div.release-body")).orElse(null);
			}
			if (bodys != null) {
				bodys.forEach(body0 -> {
					Optional.ofNullable(body0.select("p,h4")).ifPresent(ps0 -> {
						for (int i = ps0.size() - 1; i >= 0; i--) {
							Element p0 = ps0.get(i);
							JsoupPraseUtil.trimParagraph(p0);
							p0.select("span").forEach(span0 -> JsoupPraseUtil.replaceWithText(span0)); // span0.replaceWith(new
																										// TextNode(span0.ownText(),
																										// ""));
							p0.removeAttr("class");// 移除
							p0.removeAttr("itemprop");
							p0.select("a").forEach(a0 -> JsoupPraseUtil.replaceWithText(
									a0));/*
											 * a0.replaceWith(new
											 * TextNode(a0.ownText(), ""))
											 */
							if (!p0.hasText()) {
								ps0.remove(i);
							}

						}
						context.append(StringUtils.stripToNull(ps0.outerHtml()) + "\r\n");
					});

				});
			}
			info.setContent(StringUtils.stripToNull(context.toString()));
			if (StringUtils.isBlank(info.getLanguage())) {
				info.setLanguage(Constants.lang_en);
			}

		} catch (Exception e) {
			logger.warn(e);
		}
	}

	public static void main(String[] args) throws ParseException, IOException {
		String url2 = "http://www.prnewswire.com/news-releases/18th-international-photo-and-imaging-china-shanghai-opening-soon-300300526.html";
		url2 = "http://www.prnewswire.com/news-releases/huaneng-power-international-inc-2016-first-quarterly-net-profit-attributable-to-shareholders-decreased-by-2158-300257527.html";
		url2 = "http://www.prnewswire.com/news-releases/celestica-inc---q4--2015-financial-results-and-conference-call-564435231.html";
		url2 = "http://www.prnewswire.com/news-releases/celestica-inc---q4--2015-financial-results-and-conference-call-564435231.html";
		url2 = "http://www.prnewswire.com/news-releases/global-and-china-toy-industry-report-2012-186922641.html";
		Connection connection = Jsoup.connect(url2).userAgent(
				"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36")
				// .header("x-client-data",
				// "CIq2yQEIpbbJAQjEtskBCP2VygEIwpjKAQjwnMoB")
				.header("x-client-data", RandomStringUtils.randomAlphanumeric(40));

		// connection.proxy("127.0.0.1", 1080);
		org.jsoup.nodes.Document doc = connection.timeout(10000).get();
		NewsInformation info = new NewsInformation();
		new Prnewswire_HtmlPrase().praseNewsInfo(doc, info);
		System.out.println(info);

		// Mon Mar 17, 2014 9:46pm EDT
		// Wed Jan 15 00:00:00 CST 2014
		// Date d = UtilDate.parse("Mon Mar 17, 2014 9:46pm", Locale.US,
		// TimeZone.getTimeZone("EDT"), "E MMM dd, yyyy hh:mma");
		// System.out.println(d);

	}
}
